
################################################################# 
# Vladimir Chlouba, Jan Pierskalla & Erik Wibbels
# vchlouba@nd.edu
# March 2023
#################################################################
# Historical Exposure to Statehood, Ethnic Exclusion, and 
# Compliance With the State
# Tables Replication R Script
#################################################################

# loading the necessary R packages
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lme4)
library(arm)
library(memisc)
library(vegan)
library(MASS)
library(nnet)
library(spikeSlabGAM)
library(speedglm)
library(biglm)
library(ordinal)
library(erer)
library(mnlogit)
library(mlogit)
library(scales)
library(nnet)
library(haven)
library(lfe)
library(GISTools)
library(rgdal)
library(sp)
library(raster)
library(maptools)
library(rgeos)
library(lattice)
library(spdep)
library(cshapes)
library(countrycode)
library(margins)
library(ggmap)
library(dplyr)
library(lmtest)
library(sandwich)

rm(list = ls())
options(stringsAsFactors = FALSE)
#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# loading the dataset (this has to be changed based on where replication dataset is located)
afro.master <- read.csv("AB_data_replication.csv")

# note that DHS data has to be downloaded after requesting permission from the DHS Program directly
# https://dhsprogram.com/data/

DHS <- read.csv("DHS_data_replication.csv")

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# creating lists of variables
control_list_grid_pre <- "distw05_new+wateraccess+soil_quality+abslat+mnt2000+avgprec+prec2+avgtemp+malaria+share+status+status*distw05_new"
control_list_grid_post <- "+capdist+bdist1+conflict+lpop+loglightsavg+oil_dum"
control_list_indiv_pre <- "+female+age+age2"
control_list_indiv_post <- "+female+age+age2+education+employed+urban"

dv_list <- c("gov_tax", "gov_courts","gov_law")


######################################################################
# Table 1: Conditional Quasi-Voluntary Compliance
######################################################################

fmla1a <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country | 0 | gid")))

fmla1b <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country | 0 | gid")))

fmla1c <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country + ethnic_id | 0 | gid")))

fmla1d <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country + ethnic_id | 0 | gid")))

fmla2a <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country | 0 | gid")))

fmla2b <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country | 0 | gid")))

fmla2c <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country + ethnic_id | 0 | gid")))

fmla2d <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country + ethnic_id | 0 | gid")))

fmla3a <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country | 0 | gid")))

fmla3b <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country | 0 | gid")))

fmla3c <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("| country + ethnic_id | 0 | gid")))

fmla3d <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("| country + ethnic_id | 0 | gid")))



# PANEL A

# turning ethnic exclusion into a continuous variable
afro.master$status <- as.numeric(afro.master$status)

# estimate coefficients
m1a <- felm(fmla1a, data=afro.master)
m1b <- felm(fmla1b, data=afro.master)
m1c <- felm(fmla1c, data=afro.master)
m1d <- felm(fmla1d, data=afro.master)
m2a <- felm(fmla2a, data=afro.master)
m2b <- felm(fmla2b, data=afro.master)
m2c <- felm(fmla2c, data=afro.master)
m2d <- felm(fmla2d, data=afro.master)
m3a <- felm(fmla3a, data=afro.master)
m3b <- felm(fmla3b, data=afro.master)
m3c <- felm(fmla3c, data=afro.master)
m3d <- felm(fmla3d, data=afro.master)

# get standard errors
se1a <- m1a$cse
se1b <- m1b$cse
se1c <- m1c$cse
se1d <- m1d$cse
se2a <- m2a$cse
se2b <- m2b$cse
se2c <- m2c$cse
se2d <- m2d$cse
se3a <- m3a$cse
se3b <- m3b$cse
se3c <- m3c$cse
se3d <- m3d$cse


# code for LaTeX table 
stargazer(m1a,m1b, m1c, m1d, m2a,m2b, m2c, m2d, m3a,m3b, m3c, m3d, type="latex",style="qje", 
          title            = "Conditional Quasi-Voluntary Compliance (OLS Estimates)",
          covariate.labels = c("LHCE",
                               "Exclusion",
                               "LHCE*Exclusion"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          omit = c('wateraccess', 'soil_quality', 'abslat', 'mnt2000', 'avgprec', 'prec2', 'avgtemp',
                   'malaria', 'capdist', 'bdist1', 'conflict', 'lpop', 'loglightsavg', 'oil_dum',
                   'female', 'age', 'education', 'employed', 'urban', 'share'),
          omit.stat = c('ser', 'rsq'),
          column.labels   = c("Pay Taxes","Abide Court", "Abide Law"),
          column.separate = c(4, 4, 4),
          se=list(se1a,se1b, se1c, se1d, se2a,se2b, se2c, se2d, se3a, se3b, se3c, se3d),
          add.lines = list(c("Country FE", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark"),
                           c("Pre-treatment controls", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark"),
                           c("Post-treatment controls", " ", "\\checkmark"," ", "\\checkmark",
                             " ", "\\checkmark", " ", "\\checkmark", " ",
                             "\\checkmark", " ", "\\checkmark"),
                           c("Ethnic Group FE", " ", " ","\\checkmark", "\\checkmark",
                             " ", "", "\\checkmark", "\\checkmark", " ",
                             " ", "\\checkmark", "\\checkmark")),
          notes = c('Standard errors are clustered at the grid cell level.',
                    '$^{***}$p $<$ .01; $^{**}$p $<$ .05; $^{*}$p $<$ .1'),
          notes.align = 'l',
          notes.append = FALSE,
          digits = 2
)



# PANEL B

# turning ethnic exclusion into a factor
afro.master$status <- as.factor(afro.master$status)

# estimate coefficients
m1a <- felm(fmla1a, data=afro.master)
m1b <- felm(fmla1b, data=afro.master)
m1c <- felm(fmla1c, data=afro.master)
m1d <- felm(fmla1d, data=afro.master)
m2a <- felm(fmla2a, data=afro.master)
m2b <- felm(fmla2b, data=afro.master)
m2c <- felm(fmla2c, data=afro.master)
m2d <- felm(fmla2d, data=afro.master)
m3a <- felm(fmla3a, data=afro.master)
m3b <- felm(fmla3b, data=afro.master)
m3c <- felm(fmla3c, data=afro.master)
m3d <- felm(fmla3d, data=afro.master)

# get standard errors
se1a <- m1a$cse
se1b <- m1b$cse
se1c <- m1c$cse
se1d <- m1d$cse
se2a <- m2a$cse
se2b <- m2b$cse
se2c <- m2c$cse
se2d <- m2d$cse
se3a <- m3a$cse
se3b <- m3b$cse
se3c <- m3c$cse
se3d <- m3d$cse


# code for LaTeX table 
stargazer(m1a,m1b, m1c, m1d, m2a,m2b, m2c, m2d, m3a,m3b, m3c, m3d, type="latex",style="qje", 
          title            = "Conditional Quasi-Voluntary Compliance (OLS Estimates)",
          covariate.labels = c("LHCE",
                               "Junior partner",
                               "Powerless",
                               "Discriminated",
                               "LHCE $\\times$ junior partner",
                               "LHCE $\\times$ powerless",
                               "LHCE $\\times$ discriminated"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          omit = c('wateraccess', 'soil_quality', 'abslat', 'mnt2000', 'avgprec', 'prec2', 'avgtemp',
                   'malaria', 'capdist', 'bdist1', 'conflict', 'lpop', 'loglightsavg', 'oil_dum',
                   'female', 'age', 'education', 'employed', 'urban', 'share'),
          omit.stat = c('ser', 'rsq'),
          column.labels   = c("Pay Taxes","Abide Court", "Abide Law"),
          column.separate = c(4, 4, 4),
          se=list(se1a,se1b, se1c, se1d, se2a,se2b, se2c, se2d, se3a, se3b, se3c, se3d),
          add.lines = list(c("Country FE", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark"),
                           c("Pre-treatment controls", "\\checkmark", "\\checkmark","\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark",
                             "\\checkmark", "\\checkmark", "\\checkmark"),
                           c("Post-treatment controls", " ", "\\checkmark"," ", "\\checkmark",
                             " ", "\\checkmark", " ", "\\checkmark", " ",
                             "\\checkmark", " ", "\\checkmark"),
                           c("Ethnic Group FE", " ", " ","\\checkmark", "\\checkmark",
                             " ", "", "\\checkmark", "\\checkmark", " ",
                             " ", "\\checkmark", "\\checkmark")),
          notes = c('Standard errors are clustered at the grid cell level.',
                    '$^{***}$p $<$ .01; $^{**}$p $<$ .05; $^{*}$p $<$ .1'),
          notes.align = 'l',
          notes.append = FALSE,
          digits = 2
)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Table 2: Vaccination (Difference-In-Differences Estimates)
######################################################################

# loading the DHS dataset (this has to be changed based on where replication dataset is located)
# note that DHS data has to be downloaded after requesting permission from the DHS Program directly
# https://dhsprogram.com/data/

DHS <- read.csv('DHS_data_replication.csv')

# models
m1 <- felm(vaccination ~ Buganda*period | 0 | 0 | CASEID, DHS)
summary(m1)


m2 <- felm(vaccination ~ Buganda*period + urban + delivery_place | 0 | 0 | CASEID, DHS)
summary(m2)


m3 <- felm(vaccination ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
             religion + occupation +  age_mother | 0 | 0 | CASEID, DHS)
summary(m3)


m4 <- felm(vaccination ~ Buganda*period | year | 0 | CASEID, DHS)
summary(m4)


m5 <- felm(vaccination ~ Buganda*period + urban + delivery_place | year | 0 | CASEID, DHS)
summary(m5)


m6 <- felm(vaccination ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
             religion + occupation +  age_mother | year | 0 | CASEID, DHS)
summary(m6)


# code for LaTeX table
stargazer(m1, m2, m3, m4, m5, m6, no.space = T, digits = 2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

